FullConnection

传入输入矩阵与权重矩阵,执行全连接计算(矩阵乘法),并可选择性地叠加偏置项与激活函数, 最终输出结果矩阵。

\[ \begin{align}\begin{aligned}dst_{i,j} = \sum_{k=0}^{K-1} A_{i,k} \cdot B_{k,j} + bias_{i,j}\\dst_{i,j} = activation(dst_{i,j})\end{aligned}\end{align} \]

其中激活函数支持 ReLU 与 ReLU6。

输入:
  • A - 输入矩阵地址,形状为 M × K

  • B - 权重矩阵地址,形状为 K × N

  • bias - 偏置矩阵地址,形状为 M × N,可为 NULL

  • params - 参数打包成数组,结构如下:
    • M - 输出矩阵行数。

    • N - 输出矩阵列数。

    • K - 中间维度大小。

    • activation_type - 激活函数类型。

    • A_transpose - 是否转置输入矩阵A(0: 不转置, 1: 转置)。

    • B_transpose - 是否转置权重矩阵B(0: 不转置, 1: 转置)。

    • t_A - 转置后输入矩阵A的地址(仅当A_transpose=1时有效)。

    • t_B - 转置后权重矩阵B的地址(仅当B_transpose=1时有效)。

  • core_mask - 核掩码(仅适用于共享存储版本)。

输出:
  • C - 输出矩阵地址,形状为 M × N

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持fp, int8

  • MT7004 支持hp, fp

  • activation_type 支持 ACTIVATION_NONEACTIVATION_RELUACTIVATION_RELU6

共享存储版本:

void i8_full_connection_s(int8_t *A, int8_t *B, int8_t *C, int8_t *bias, long long *params, int core_mask)
void fp_full_connection_s(float *A, float *B, float *C, float *bias, long long *params, int core_mask)
void hp_full_connection_s(half *A, half *B, half *C, half *bias, long long *params, int core_mask)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <fullconnection.h>
 4
 5int main(int argc, char* argv[]) {
 6    float* A_ref = (float*)0x81000000;
 7    float* B_ref = (float*)0x82000000;
 8    float* C_ref = (float*)0x83000000;
 9    float* bias_ref = (float*)0x84000000;
10
11    float* C_single = (float*)0x85000000;
12
13    int M = 8;
14    int N = 8;
15    int K = 8;
16
17    bool A_transpose = true;
18    bool B_transpose = false;
19    float *t_A = (float*)0x10060000;
20    float *t_B = (float*)0x10070000;
21
22    // Initialize A, B, bias with small values
23    for (i = 0; i < M * K; ++i) {
24        A_ref[i] = (float)(i % 10) * 0.1f;
25    }
26    for (i = 0; i < K * N; ++i) {
27        B_ref[i] = (float)(i % 10) * 0.1f;
28    }
29    for (i = 0; i < M * N; ++i) {
30        C_ref[i] = 0.0f;
31        C_single[i] = 0.0f;
32        bias_ref[i] = (float)(i % 5) * 0.01f;
33    }
34
35    long long params[10];
36
37    params[0] = (long long)M;
38    params[1] = (long long)N;
39    params[2] = (long long)K;
40    params[3] = (long long)ACTIVATION_RELU;
41    params[4] = (long long)A_transpose;
42    params[5] = (long long)B_transpose;
43    params[6] = (long long)t_A;
44    params[7] = (long long)t_B;
45
46    int core_mask = 0xff;
47    fp_full_connection_s(A_ref, B_ref, C_single, bias_ref, params, core_mask);
48    return 0;
49}

私有存储版本:

void i8_full_connection_p(int8_t *A, int8_t *B, int8_t *C, int8_t *bias, long long *params)
void fp_full_connection_p(float *A, float *B, float *C, float *bias, long long *params)
void hp_full_connection_p(half *A, half *B, half *C, half *bias, long long *params)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <fullconnection.h>
 4
 5int main(int argc, char* argv[]) {
 6    float* A_ref = (float*)0x10010000;
 7    float* B_ref = (float*)0x10020000;
 8    float* C_ref = (float*)0x10030000;
 9    float* bias_ref = (float*)0x10040000;
10
11    float* C_single = (float*)0x10050000;
12
13    int M = 8;
14    int N = 8;
15    int K = 8;
16
17    bool A_transpose = true;
18    bool B_transpose = false;
19    float *t_A = (float*)0x10060000;
20    float *t_B = (float*)0x10070000;
21
22    // Initialize A, B, bias with small values
23    for (i = 0; i < M * K; ++i) {
24        A_ref[i] = (float)(i % 10) * 0.1f;
25    }
26    for (i = 0; i < K * N; ++i) {
27        B_ref[i] = (float)(i % 10) * 0.1f;
28    }
29    for (i = 0; i < M * N; ++i) {
30        C_ref[i] = 0.0f;
31        C_single[i] = 0.0f;
32        bias_ref[i] = (float)(i % 5) * 0.01f;
33    }
34
35    long long params[10];
36
37    params[0] = (long long)M;
38    params[1] = (long long)N;
39    params[2] = (long long)K;
40    params[3] = (long long)ACTIVATION_RELU;
41    params[4] = (long long)A_transpose;
42    params[5] = (long long)B_transpose;
43    params[6] = (long long)t_A;
44    params[7] = (long long)t_B;
45
46    fp_full_connection_p(A_ref, B_ref, C_single, bias_ref, params);
47}